library(tidyverse)
library(lubridate)
library(themebg)
data_dir <- "csse_covid_19_data/csse_covid_19_daily_reports"
# f <- list.files(data_dir, pattern = "csv", full.names = TRUE)
# x <- map(f, read_csv)
# raw_daily <- map_df(f, read_csv)
# day1 <- read_csv("csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv")
# yest <- read_csv("csse_covid_19_data/csse_covid_19_daily_reports/03-22-2020.csv")
curr <- read_csv("csse_covid_19_data/csse_covid_19_daily_reports/03-24-2020.csv")
df_confirmed <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
rename(
state = `Province/State`,
country = `Country/Region`,
lat = Lat,
long = Long
) %>%
pivot_longer(
cols = c(-state, -country, -lat, -long),
names_to = "date",
values_to = "confirmed"
) %>%
mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")
df_deaths <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%
rename(
state = `Province/State`,
country = `Country/Region`,
lat = Lat,
long = Long
) %>%
pivot_longer(
cols = c(-state, -country, -lat, -long),
names_to = "date",
values_to = "deaths"
) %>%
mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")
df_recovered <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv") %>%
rename(
state = `Province/State`,
country = `Country/Region`,
lat = Lat,
long = Long
) %>%
pivot_longer(
cols = c(-state, -country, -lat, -long),
names_to = "date",
values_to = "recovered"
) %>%
mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")
df_ts <- left_join(df_confirmed, df_deaths) %>%
left_join(df_recovered)
df_country <- df_ts %>%
group_by(country, date) %>%
summarize_at(c("confirmed", "deaths", "recovered"), sum, na.rm = TRUE) %>%
ungroup() %>%
mutate(active = confirmed - deaths - recovered) %>%
group_by(country) %>%
mutate(
new_cases = confirmed - lag(confirmed),
new_deaths = deaths - lag(deaths)
)
df_case1 <- df_country %>%
group_by(country) %>%
arrange(date, country) %>%
filter(confirmed > 0) %>%
distinct(country, .keep_all = TRUE) %>%
select(country, date_case1 = date)
df_case50 <- df_country %>%
group_by(country) %>%
arrange(date, country) %>%
filter(confirmed > 50) %>%
distinct(country, .keep_all = TRUE) %>%
select(country, date_case50 = date)
df_data <- df_country %>%
left_join(df_case1) %>%
left_join(df_case50) %>%
mutate(
day_case1 = difftime(date, date_case1, units = "days"),
day_case50 = difftime(date, date_case50, units = "days"),
usa = country == "US",
str_date = as.character(date)
) %>%
mutate_at(c("day_case1", "day_case50"), as.numeric)
df_top25 <- df_data %>%
arrange(desc(date), country) %>%
distinct(country, .keep_all = TRUE) %>%
ungroup() %>%
top_n(25, confirmed)
df_top12 <- df_data %>%
arrange(desc(date), country) %>%
distinct(country, .keep_all = TRUE) %>%
ungroup() %>%
top_n(12, confirmed)
df_plt <- filter(df_data, confirmed > 0)
df_plt_top12 <- semi_join(df_plt, df_top12, by = "country") %>%
group_by(country, date) %>%
mutate(death_rate = deaths / confirmed * 100) %>%
ungroup()
df_plt_day1 <- filter(df_data, day_case1 >= 0)
df_plt_day1_top25 <- semi_join(df_plt_day1, df_top25, by = "country")
df_plt_day1_top12 <- semi_join(df_plt_day1, df_top12, by = "country")
df_plt_day50 <- filter(df_data, day_case50 >= 0)
df_plt_day50_top25 <- semi_join(df_plt_day50, df_top25, by = "country")
library(plotly)
df_plt_top12 %>%
plot_ly(x = ~date, y = ~confirmed, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Confirmed cases by country
df_plt_top12 %>%
plot_ly(x = ~date, y = ~deaths, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Deaths by country
df_plt_top12 %>%
plot_ly(x = ~date, y = ~death_rate, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Death rate by country
df_plt_day1_top12 %>%
plot_ly(x = ~day_case1, y = ~confirmed, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Confirmed cases since first case reported in each country
df_plt_day1_top12 %>%
plot_ly(x = ~day_case1, y = ~deaths, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
Deaths since first case reported in each country
df_plt_day1_top12 %>%
plot_ly(x = ~day_case1, y = ~new_cases, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
New cases reported each day by country
df_plt_day1_top12 %>%
plot_ly(x = ~day_case1, y = ~new_deaths, color = ~country, colors = "Paired") %>%
add_lines(hovertext = ~country) %>%
layout(showlegend = FALSE)
New deaths reported each day by country
df_plt_day1_top12 %>%
ggplot(aes(x = day_case1, y = new_cases, color = country, size = usa)) +
geom_smooth(se = FALSE) +
scale_color_brewer(NULL, palette = "Paired") +
scale_size_discrete(NULL, range = c(0, 1.5)) +
theme_bg() +
theme(legend.position = "none")
df_plt_day1_top12 %>%
ggplot(aes(x = day_case1, y = new_deaths, color = country, size = usa)) +
geom_smooth(se = FALSE) +
scale_color_brewer(NULL, palette = "Paired") +
scale_size_discrete(NULL, range = c(0, 1.5)) +
theme_bg() +
theme(legend.position = "none")
df_plt_top12 %>%
plot_ly(
type = "scattergeo",
locationmode = "country names",
locations = ~country,
size = ~confirmed,
frame = ~str_date,
showlegend = FALSE
) %>%
layout(geo = list(projection = list(type = "natural earth")))
Confirmed cases by country over time